################################################################################
# Purpose: Identify "treated" census blocks by proximity to shooting
# Author: Caroline Chin and Jonathan Tebes
# Last Updated: 2023-02-07
#
# Procedure: 
# Repeat for each distance, d, between 0 and 2 miles w/ increment 0.05:
#   1. For each shooting s, identify all census blocks that are within d miles of s.
#         For code efficiency, exclude pairs that are already marked treated under a smaller distance 
#   2. For (shooting, census block) pairs identified in part (1), calculate area of overlap
#   3. If overlapping area is over 75% of the census block area, mark the block as treated under the distance increment
#
# Input files:
#     - Census block shapefiles for 06037 in 2010: "raw/distances/tl_2010_06037_tabblock10"
#     - Coordinates of shootings in LA: "raw/distances/shootings_long_lat.csv"
# Output files:
#     - Census blocks where 75% percent of the block is within Y miles of a shooting 
#              "raw/distances/processed/shooting_exposed_cb_<Y>mi.csv"
################################################################################

#-------------------------------------------------------------------------------
# Setup 
#-------------------------------------------------------------------------------
rm(list = ls())
# Load packages and install if needed
if (!require("pacman")) install.packages("pacman")
pacman::p_load(dplyr, units, sf, leaflet, rgdal, parallel,udunits2)

# Define file names
# Update the below path to local directory
setwd('/Users/jtebes/Dropbox/voting_police_violence/data_and_code/replication')
census_block_shp_filename <- paste("raw/distances", "tl_2010_06037_tabblock10", sep = "/")
shooting_lat_long_filename <- paste("raw/distances", "shootings_long_lat.csv", sep = "/")
shooting_exposed_cb_outdir <- paste("raw/distances/processed")

# Constants
share_block_covered <- 0.75 # share of census block within X miles of shooting used to determine treatment status
distance_from_shooting_vec <- seq(0.05, 2, 0.05) # list of distances from shooting

#-------------------------------------------------------------------------------
# Calculate Census Block Overlap 
#-------------------------------------------------------------------------------

# Create baseline census block and shootings dataset ---------------------------
# Load census block shapefile
census_block_shp <- st_read(census_block_shp_filename)
shoot_census_block_shp <- census_block_shp %>% 
  mutate(cb_area = st_area(.)) %>% 
  select(cb_geoid = GEOID10, cb_area) %>% 
  mutate(cb_row_id = 1:n())

# Load shootings coordinates
shooting_coords <- read.csv(shooting_lat_long_filename, 
                            colClasses = c('cb' = 'character'), 
                            stringsAsFactors=F) %>% 
  st_as_sf(coords = c("sht_long", "sht_lat"), 
           remove=F, 
           crs = st_crs(shoot_census_block_shp)) %>% 
  mutate(shooting_row_id = 1:n())

# Functions for getting treated census blocks (based share of block within distance of shooting) ---------------------------

# Function get_intersect_pairs: Get shooting X census block pairs that are overlapping
get_intersect_pairs <- function(shooting_r, shoot_census_block_shp) {
  
  # get block X shooting pairs that intersect
  intersersect_pairs_idx <- shooting_r %>% select(sht_id) %>% 
    st_intersects(shoot_census_block_shp %>% select(cb_geoid))
  assertthat::assert_that(length(intersersect_pairs_idx)==dim(shooting_r)[1])
  
  # get list of shooting IDs and census block GEOIDs
  sht_id_list <- shooting_r %>% pull(sht_id)
  cb_geoid_list <- shoot_census_block_shp %>% pull(cb_geoid)
  
  # convert list of intersects indices to shooting ID and GEOIDs 
  res <- lapply(seq(1,dim(intersersect_pairs_idx)[1]),
                function(sht_id_idx) {
                  sht_id <- sht_id_list[sht_id_idx]
                  cb_geoid_vec <- cb_geoid_list[intersersect_pairs_idx[[sht_id_idx]]]
                  idx_pairs <- cbind(sht_id, cb_geoid_vec)
                }) 
  
  # convert to data frame
  intersersect_pairs <- do.call(rbind, res) %>% as.data.frame()
  
  # create output object
  out <- list()
  out$intersersect_pairs <- intersersect_pairs
  out$intersersect_pairs_idx <- intersersect_pairs_idx
  
  return(out)
}


# Function get_treated_cb: Get shooting X census block pairs where <share_block_covered> share of 
#           each census block is within <d_mi> miles of a shooting
get_treated_cb <- function(d_mi, share_block_covered, 
                           shooting_coords, shoot_census_block_shp, 
                           excl_pairs,
                           shooting_exposed_cb_outdir) {
  start_time <- Sys.time()
  print(paste("START", d_mi, "miles", start_time))
  
  # distance in meters 
  d <- udunits2::ud.convert(d_mi, "mi", "m")
  
  # Generate circular polygon around each shooting with radius d
  shooting_r <- st_buffer(shooting_coords, dist=d)
  shooting_r <- shooting_r %>% 
    rename(sht_cb_geoid = cb) 
  
  # Get list of intersecting shooting X census block pairs
  res <- get_intersect_pairs(shooting_r, shoot_census_block_shp)
  intersersect_pairs <- res$intersersect_pairs
  intersersect_pairs_idx <- res$intersersect_pairs_idx
  
  # Get intersecting geometries of census block pairs for each shooting
  intersect_list <- lapply(seq(1,dim(intersersect_pairs_idx)[[1]]),
                           function(x, excl_pairs){
                             # get shooting geom
                             shoot_geom <- shooting_r[x,]
                             
                             # get relevant census block geom: all intersecting census blocks without census blocks
                             #    that have already been linked to the shooting using a smaller radius
                             cb_idx <- setdiff(intersersect_pairs_idx[[x]], excl_pairs[[x]])          
                             cb_geom <- shoot_census_block_shp[cb_idx,]    
                             
                             # calculate overlapping area
                             intersection <- st_intersection(shoot_geom, cb_geom)
                             
                             # keep census blocks where a sufficiently high share of the block is close to the shooting
                             intersection <- intersection %>% 
                               mutate(overlap_area = st_area(.),
                                      pct_covered = as.numeric(overlap_area/cb_area)) %>% 
                               st_set_geometry(NULL) %>% 
                               subset(pct_covered >= share_block_covered) 

                             return(intersection)
                           },
                          excl_pairs=excl_pairs)
  intersections <- do.call(rbind, intersect_list) %>% as.data.frame()

  # generate output data frame 
  intersections <- intersections %>% 
    mutate(distance = d_mi) %>% 
    select(distance, sht_id, sht_cb_geoid, sht_long, sht_lat, cb_geoid, pct_covered) 
  
  # save file
  d_mi_str = d_mi %>% stringr::str_replace("\\.","pt")
  outfile <- paste0(shooting_exposed_cb_outdir, "/shooting_exposed_cb_", d_mi_str, ".csv")
  intersections %>% write.csv(outfile, row.names=F)
  
  # identify census block X shooting pairs that can be excluded from future iterations
  excl_pairs_new <- lapply(seq(1,length(intersect_list)) , function(x) {
    # identify row IDs of treated census blocks
    new_cb <- intersect_list[[x]]$cb_row_id
    
    # check that all new census blocks are excluded from previous census blocks
    assertthat::assert_that(length(intersect(new_cb,excl_pairs[[x]]))==0) # ADD FOR EXCL PAIRS
    
    # add new census blocks to rows that will be excluded in future iterations
    excl_pairs[[x]] = c(excl_pairs[[x]], new_cb)
  })
  
  # generate output object
  out <- list()
  out[['intersections']] <- intersections
  out[['excl_pairs']] <- excl_pairs_new
  
  print(paste("Total Time", Sys.time()-start_time))
  
  return(out)
}


# exclude pairs already identified as treated with a smaller distance around shooting.
# initialize excluded pairs dictionary with empty list of census blocks for each shooting
excl_pairs <- lapply(shooting_coords$shooting_row_id, function(x){c()})

# Get treated census blocks for each distance ---------------------------
for (d_mi in distance_from_shooting_vec) {
  
  res <- get_treated_cb(d_mi, share_block_covered = share_block_covered,
                 shooting_coords = shooting_coords,
                 excl_pairs = excl_pairs,
                 shoot_census_block_shp = shoot_census_block_shp,
                 shooting_exposed_cb_outdir = shooting_exposed_cb_outdir)
  treated_cb <- res$intersections
  excl_pairs <- res$excl_pairs
}

